import requests;
from lxml import etree;

#定义一个简单的html结构
html='''
<html>
	<div>
		<ul>
			<li class="item-0">
				<a href="link1.html">first item</a>
			</li>
			<li class="item-1">
				<a href="link2.html">second item</a>
			</li>	
			<li class="item-active">
				<a href="link3.html">third item</a>
			</li>	
			<li class="item-1">
				<a href="link4.html">fourth item</a>
			</li>			
			<li class="item-0">
				<a href="link5.html">fifth item</a>
			</li>
			<li class="else-1">something else</li>		
			this is ul item
		</ul>				
	</div>
</html>
''';

selector=etree.HTML(html);#初始化etree

#查找class属性中，以item-开头的
all_item=selector.xpath("//li[starts-with(@class,'item-')]");
print(all_item);

#继续在获取到的li中使用xpath，进一步获取li下面的a

item_a=[];#定义一个列表来接收a标签中的text
for v in all_item:
	item_a.append(v.xpath('a/text()')[0]);

print(item_a);	